#coding:utf-8
import scrapy
import json
import csvout

class QuotesSpider(scrapy.Spider):
    name = "stackoverflow"

    def start_requests(self):
        url = 'https://stackoverflow.com/'
        tag = getattr(self, 'tag', None)
        tid = getattr(self, 'tid', None)
        print tid
        header = {
            #加上User-Agnet后好使了
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding':'gzip, deflate, sdch, br',
            'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
            'Cache-Control':'max-age=0',
            'Connection':'keep-alive',
            'Host':'www.baidu.com',
           # 'RA-Sid':'7739A016-20140918-030243-3adabf-48f828',
            #'RA-Ver':'3.0.7',
            'Upgrade-Insecure-Requests':'1',
            #'Cookie':'%s' % getCookie()
            #'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            #'Accept-Language': 'en',
        }
        if tag is not None:
            url = url + 'search?q=' + tag
            yield scrapy.Request(url,headers=header, callback=self.parse)

    def parse(self, response):
        #从chrome里拷出来的xpath
        res = response.xpath('//*[@id="mainbar"]/div[2]/h2/text()').extract_first()
        #过滤出其中的数字
        num = filter(lambda x: x.isdigit(), res)
        num = int(num)
        outli = ['stackoverflow_num', num, response.url]
        csvout.list2csv(outli)
        yield {
            'stackoverflow_num': num
        }

